Last compiled: 2021-01-05
Goal
In the previous section, we predicted whether or not a product will be put on ‘backorder’ status using H2O model. We now take the H2O models developed to inspect, visualize, and communicate performance to business stakeholders.
These are some relevant questions to ask ponder:
cowplot package?
cowplot package
For this, I will be reusing the Product Backorders data set (source of raw data is linked below). You may download the data in case you want to try this code on your own.
Please note this is a continuation of the previous section.
Raw data source:
Download product_backorders.csv
As a first step, please load tidyverse and tidymodels libraries. For details on what these libraries offer, please refer to the comments in the code block below.
# STEP 1: Load Libraries ---
# Tidy, Transform, & Visualize
library(tidyverse)
# library(tibble) --> is a modern re-imagining of the data frame
# library(readr) --> provides a fast and friendly way to read rectangular data like csv
# library(dplyr) --> provides a grammar of data manipulation
# library(magrittr) --> offers a set of operators which make your code more readable (pipe operator)
# library(tidyr) --> provides a set of functions that help you get to tidy data
# library(stringr) --> provides a cohesive set of functions designed to make working with strings as easy as possible
# library(ggplot2) --> graphics
library(tidymodels)
# library(rsample) --> provides infrastructure for efficient data splitting, resampling and cross validation.
# library(parsnip) --> provides an API to many powerful modeling algorithms in R.
# library(recipes) --> tidy interface to data pre-processing (making statistical transformations) tools for feature engineering (prior to modeling).
# library(workflows) --> bundle your pre-processing, modeling, and post-processing together.
# library(tune) --> helps you optimize the hyperparameters of your model and pre-processing steps.
# library(yardstick) --> measures the effectiveness of models using performance metrics (metrics for model comparison).
# library(broom) --> converts the information in common statistical R objects into user-friendly, predictable formats.
# library(dials) --> creates and manages tuning parameters and parameter grids.
library(h2o) # H2O modeling
library(ggthemes) # Better themes for plotting and color palettes
library(glue) # Implementation of interpreted string literals
library(cowplot) # Provides various features to help create publication-quality figures
If you haven’t installed these packages, please install them by calling install.packages([name_of_package]) in the R console. After installing, run the above code block again.
# Visualize the H2O leaderboard to help with model selection
data_transformed_tbl <- automl_models_h2o@leaderboard %>%
as_tibble() %>%
select(-c(aucpr, mean_per_class_error, rmse, mse)) %>%
mutate(model_type = str_extract(model_id, "[^_]+")) %>%
slice(1:n()) %>%
rownames_to_column(var = "rowname") %>%
# Visually this step will not change anything
# It reorders the factors under the hood
mutate(
model_id = as_factor(model_id) %>% reorder(auc),
model_type = as.factor(model_type)
) %>%
pivot_longer(cols = -c(model_id, model_type, rowname),
names_to = "key",
values_to = "value",
names_transform = list(key = forcats::fct_inorder)
) %>%
mutate(model_id = paste0(rowname, ". ", model_id) %>% as_factor() %>% fct_rev())
# Perform visualization
data_transformed_tbl %>%
ggplot(aes(value, model_id, color = model_type)) +
geom_point(size = 3) +
geom_label(aes(label = round(value, 3), hjust = "inward"), show.legend = F) +
scale_color_gdocs() +
# Facet to break out logloss and auc
facet_wrap(~ toupper(key), scales = "free_x") +
labs(title = "Leaderboard Metrics",
subtitle = paste0("Ordered by: ", "AUC (Area Under the Curve)"),
y = "Model Postion, Model ID", x = "") +
theme(legend.position = "bottom")
# Extracts an H2O model name by a position so can more easily use h2o.getModel()
extract_h2o_model_name_by_position <- function(h2o_leaderboard, n = 1, verbose = T) {
model_name <- h2o_leaderboard %>%
as.tibble() %>%
slice(n) %>%
pull(model_id)
if (verbose) message(model_name)
return(model_name)
}
# Save multiple models by extracting from leaderboard
for (num in c(1,2,3,4,13,14,15,16)){
automl_models_h2o@leaderboard %>%
extract_h2o_model_name_by_position(num) %>%
h2o.getModel() %>%
h2o.saveModel(path = "00_h2o_models/03/")
}
# Loading Distributed Random Forest model
drf_h2o <- h2o.loadModel("00_h2o_models/03/DRF_1_AutoML_20210105_210409")
# Take a look at the metrics on the training data set
drf_h2o
## Model Details:
## ==============
##
## H2OBinomialModel: drf
## Model ID: DRF_1_AutoML_20210105_210409
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 2 2 18500 20 20 20.00000 717 735 726.00000
##
##
## H2OBinomialMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 0.1072041
## RMSE: 0.3274204
## LogLoss: 2.616794
## Mean Per-Class Error: 0.2841806
## AUC: 0.7429379
## AUCPR: 0.3779368
## Gini: 0.4858758
## R^2: 0.01656574
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 5135 490 0.087111 =490/5625
## Yes 385 415 0.481250 =385/800
## Totals 5520 905 0.136187 =875/6425
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.230769 0.486804 66
## 2 max f2 0.076923 0.530543 126
## 3 max f0point5 0.547170 0.504115 29
## 4 max accuracy 0.547170 0.884669 29
## 5 max precision 0.547170 0.555766 29
## 6 max recall 0.000000 1.000000 162
## 7 max specificity 1.000000 0.964267 0
## 8 max absolute_mcc 0.230769 0.409669 66
## 9 max min_per_class_accuracy 0.010000 0.668750 160
## 10 max mean_per_class_accuracy 0.150000 0.725928 94
## 11 max tns 1.000000 5424.000000 0
## 12 max fns 1.000000 573.000000 0
## 13 max fps 0.000000 5625.000000 162
## 14 max tps 0.000000 800.000000 162
## 15 max tnr 1.000000 0.964267 0
## 16 max fnr 1.000000 0.716250 0
## 17 max fpr 0.000000 1.000000 162
## 18 max tpr 0.000000 1.000000 162
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on validation data. **
##
## MSE: 0.08305517
## RMSE: 0.2881929
## LogLoss: 0.9986192
## Mean Per-Class Error: 0.2412791
## AUC: 0.8332329
## AUCPR: 0.4829946
## Gini: 0.6664658
## R^2: 0.2276674
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 2814 286 0.092258 =286/3100
## Yes 169 264 0.390300 =169/433
## Totals 2983 550 0.128786 =455/3533
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.303279 0.537131 75
## 2 max f2 0.093023 0.635733 126
## 3 max f0point5 0.527381 0.539542 46
## 4 max accuracy 0.527381 0.894990 46
## 5 max precision 0.722500 0.728972 13
## 6 max recall 0.000000 1.000000 159
## 7 max specificity 1.000000 0.992581 0
## 8 max absolute_mcc 0.303279 0.468035 75
## 9 max min_per_class_accuracy 0.093023 0.788065 126
## 10 max mean_per_class_accuracy 0.093023 0.795880 126
## 11 max tns 1.000000 3077.000000 0
## 12 max fns 1.000000 395.000000 0
## 13 max fps 0.000000 3100.000000 159
## 14 max tps 0.000000 433.000000 159
## 15 max tnr 1.000000 0.992581 0
## 16 max fnr 1.000000 0.912240 0
## 17 max fpr 0.000000 1.000000 159
## 18 max tpr 0.000000 1.000000 159
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 0.0879822
## RMSE: 0.2966179
## LogLoss: 1.596596
## Mean Per-Class Error: 0.2351918
## AUC: 0.8026587
## AUCPR: 0.4375996
## Gini: 0.6053174
## R^2: 0.1596044
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 8484 995 0.104969 =995/9479
## Yes 467 811 0.365415 =467/1278
## Totals 8951 1806 0.135911 =1462/10757
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.241281 0.525940 222
## 2 max f2 0.117637 0.603437 293
## 3 max f0point5 0.512821 0.535790 102
## 4 max accuracy 0.514472 0.895231 101
## 5 max precision 0.668724 0.605960 53
## 6 max recall 0.000000 1.000000 399
## 7 max specificity 1.000000 0.980167 0
## 8 max absolute_mcc 0.363729 0.460063 173
## 9 max min_per_class_accuracy 0.045450 0.756651 350
## 10 max mean_per_class_accuracy 0.117637 0.775870 293
## 11 max tns 1.000000 9291.000000 0
## 12 max fns 1.000000 1046.000000 0
## 13 max fps 0.000000 9479.000000 399
## 14 max tps 0.000000 1278.000000 399
## 15 max tnr 1.000000 0.980167 0
## 16 max fnr 1.000000 0.818466 0
## 17 max fpr 0.000000 1.000000 399
## 18 max tpr 0.000000 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## accuracy 0.874683 0.025082039 0.8870818 0.89684016 0.8944677 0.8461181 0.8489075
## auc 0.8096728 0.050696757 0.73739433 0.775471 0.84736353 0.85114276 0.8369922
## aucpr 0.4580898 0.0669429 0.36909607 0.41765285 0.53625786 0.50547206 0.46197012
## err 0.12531696 0.025082039 0.11291821 0.10315985 0.10553231 0.15388192 0.15109251
## err_count 269.6 53.91475 243.0 222.0 227.0 331.0 325.0
##
## ---
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## pr_auc 0.4580898 0.0669429 0.36909607 0.41765285 0.53625786 0.50547206 0.46197012
## precision 0.49845847 0.076821215 0.5261044 0.5685484 0.5650224 0.41555557 0.4170616
## r2 0.15971397 0.12981917 -0.015012705 0.058439042 0.27842999 0.2473892 0.22932433
## recall 0.5956434 0.109153345 0.51171875 0.55078125 0.4921875 0.73333335 0.6901961
## rmse 0.2959151 0.022741947 0.32616144 0.31413844 0.27505746 0.2804362 0.2837819
## specificity 0.91233635 0.042805854 0.9377637 0.9435654 0.94881266 0.86128694 0.87025315
# We want to see how it performs for the testing data frame
# Make sure to convert it to an h20 object
h2o.performance(drf_h2o, newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|=============================================================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.08314183
## RMSE: 0.2883433
## LogLoss: 0.9391007
## Mean Per-Class Error: 0.2483905
## AUC: 0.8287744
## AUCPR: 0.4607902
## Gini: 0.6575489
## R^2: 0.1923708
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3778 430 0.102186 =430/4208
## Yes 219 336 0.394595 =219/555
## Totals 3997 766 0.136259 =649/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.276923 0.508706 82
## 2 max f2 0.093023 0.610127 142
## 3 max f0point5 0.593023 0.497038 29
## 4 max accuracy 0.627451 0.896494 21
## 5 max precision 0.875000 0.712963 4
## 6 max recall 0.000000 1.000000 176
## 7 max specificity 1.000000 0.993584 0
## 8 max absolute_mcc 0.276923 0.439501 82
## 9 max min_per_class_accuracy 0.093023 0.773527 142
## 10 max mean_per_class_accuracy 0.093023 0.784061 142
## 11 max tns 1.000000 4181.000000 0
## 12 max fns 1.000000 499.000000 0
## 13 max fps 0.000000 4208.000000 176
## 14 max tps 0.000000 555.000000 176
## 15 max tnr 1.000000 0.993584 0
## 16 max fnr 1.000000 0.899099 0
## 17 max fpr 0.000000 1.000000 176
## 18 max tpr 0.000000 1.000000 176
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
drf_grid_01 <- h2o.grid(
# See help page for available algorithms via ?h2o.grid()
algorithm = "randomForest",
# Use the same as the object
grid_id = "drf_grid_01",
# predictor and response variables
x = x,
y = y,
# training and validation frame and crossfold validation
training_frame = train_h2o,
validation_frame = valid_h2o,
nfolds = 5,
# Hyperparamters: Use drf_h2o@allparameters to see all
hyper_params = list(
# Use different number of trees to find a better model
ntrees = c(5, 10, 15, 20, 50, 60, 70, 120, 140, 160, 250)
)
)
# Ordered by increasing logloss
drf_grid_01
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_01
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by increasing logloss
## ntrees model_ids logloss
## 1 160 drf_grid_01_model_10 0.20366888047692755
## 2 250 drf_grid_01_model_11 0.2039207640599753
## 3 50 drf_grid_01_model_5 0.20414687844717408
## 4 120 drf_grid_01_model_8 0.20627709249140205
## 5 140 drf_grid_01_model_9 0.2072672887955163
## 6 70 drf_grid_01_model_7 0.2101568549583345
## 7 20 drf_grid_01_model_4 0.2104053798292038
## 8 60 drf_grid_01_model_6 0.21082877559021032
## 9 15 drf_grid_01_model_3 0.21533393083906668
## 10 10 drf_grid_01_model_2 0.23012189452320658
## 11 5 drf_grid_01_model_1 0.35749334025103174
# Ordered by decreasing auc
h2o.getGrid(grid_id = "drf_grid_01", sort_by = "auc", decreasing = TRUE)
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_01
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by decreasing auc
## ntrees model_ids auc
## 1 250 drf_grid_01_model_11 0.9418734040373573
## 2 160 drf_grid_01_model_10 0.941701580348686
## 3 140 drf_grid_01_model_9 0.9389589638969662
## 4 50 drf_grid_01_model_5 0.9387205239619546
## 5 120 drf_grid_01_model_8 0.9385640129296604
## 6 70 drf_grid_01_model_7 0.9360732917390407
## 7 60 drf_grid_01_model_6 0.9351195732730007
## 8 20 drf_grid_01_model_4 0.9306097276889644
## 9 15 drf_grid_01_model_3 0.9242986019173262
## 10 10 drf_grid_01_model_2 0.9166785948545183
## 11 5 drf_grid_01_model_1 0.8651228619858312
drf_grid_01_model_10 <- h2o.getModel("drf_grid_01_model_10")
drf_grid_01_model_10 %>% h2o.auc(train = T, valid = T, xval = T)
## train valid xval
## 0.9390323 0.9491384 0.9417016
# The model is not overfitting because there's a small difference between the
# training AUC and the validation / cross validation AUC
# Run it with test data and compare to the results from "drf_h2o" model above
drf_grid_01_model_10 %>%
h2o.performance(newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|=============================================================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.05908119
## RMSE: 0.2430662
## LogLoss: 0.2030881
## Mean Per-Class Error: 0.1685522
## AUC: 0.938512
## AUCPR: 0.7118037
## Gini: 0.877024
## R^2: 0.4260928
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3995 213 0.050618 =213/4208
## Yes 159 396 0.286486 =159/555
## Totals 4154 609 0.078102 =372/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.315582 0.680412 162
## 2 max f2 0.223879 0.741284 206
## 3 max f0point5 0.386511 0.712484 131
## 4 max accuracy 0.386511 0.930086 131
## 5 max precision 0.857647 1.000000 0
## 6 max recall 0.019339 1.000000 368
## 7 max specificity 0.857647 1.000000 0
## 8 max absolute_mcc 0.315582 0.636924 162
## 9 max min_per_class_accuracy 0.170337 0.862167 236
## 10 max mean_per_class_accuracy 0.140098 0.866344 255
## 11 max tns 0.857647 4208.000000 0
## 12 max fns 0.857647 554.000000 0
## 13 max fps 0.001456 4208.000000 399
## 14 max tps 0.019339 555.000000 368
## 15 max tnr 0.857647 1.000000 0
## 16 max fnr 0.857647 0.998198 0
## 17 max fpr 0.001456 1.000000 399
## 18 max tpr 0.019339 1.000000 368
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
# Loading top H2O model
stacked_ensemble_h2o <- h2o.loadModel("00_h2o_models/03/StackedEnsemble_AllModels_AutoML_20210105_210409")
performance_h2o <- h2o.performance(stacked_ensemble_h2o, newdata = as.h2o(test_tbl))
typeof(performance_h2o)
## [1] "S4"
performance_h2o %>% slotNames()
## [1] "algorithm" "on_train" "on_valid" "on_xval" "metrics"
performance_tbl <- performance_h2o %>%
h2o.metric() %>%
as.tibble()
performance_tbl %>%
glimpse()
## Rows: 400
## Columns: 20
## $ threshold [3m[38;5;246m<dbl>[39m[23m 0.9916312, 0.9894357, 0.9873502, 0.9851112, 0.9834827, 0.9818633, 0.9804420, 0.9790831, 0.9763741, 0.9750580, 0.9742…
## $ f1 [3m[38;5;246m<dbl>[39m[23m 0.01075269, 0.02486679, 0.04912281, 0.06608696, 0.07279029, 0.08261618, 0.08888889, 0.10792580, 0.12060302, 0.120401…
## $ f2 [3m[38;5;246m<dbl>[39m[23m 0.006747638, 0.015709156, 0.031319911, 0.042410714, 0.046833185, 0.053428317, 0.057777778, 0.070859167, 0.079575597,…
## $ f0point5 [3m[38;5;246m<dbl>[39m[23m 0.02645503, 0.05962521, 0.11382114, 0.14960630, 0.16329705, 0.18209408, 0.19259259, 0.22630835, 0.24896266, 0.247592…
## $ accuracy [3m[38;5;246m<dbl>[39m[23m 0.8841067, 0.8847365, 0.8862062, 0.8872559, 0.8876758, 0.8880957, 0.8880957, 0.8889355, 0.8897754, 0.8895654, 0.8908…
## $ precision [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.8750000, 0.9333333, 0.9500000, 0.9545455, 0.9230769, 0.8666667, 0.8421053, 0.8571429, 0.8372093, 0.8181…
## $ recall [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.012612613, 0.025225225, 0.034234234, 0.037837838, 0.043243243, 0.046846847, 0.057657658, 0.064864865,…
## $ specificity [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9995247, 0.9990494, 0.9985741, 0.9985741, 0.9983365, 0.9976…
## $ absolute_mcc [3m[38;5;246m<dbl>[39m[23m 0.06912713, 0.09696349, 0.14308945, 0.16868566, 0.17792138, 0.18623637, 0.18613709, 0.20280266, 0.21772039, 0.214390…
## $ min_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.012612613, 0.025225225, 0.034234234, 0.037837838, 0.043243243, 0.046846847, 0.057657658, 0.064864865,…
## $ mean_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.5027027, 0.5061875, 0.5124938, 0.5169983, 0.5188001, 0.5213840, 0.5229481, 0.5281159, 0.5317195, 0.5316007, 0.5393…
## $ tns [3m[38;5;246m<dbl>[39m[23m 4208, 4207, 4207, 4207, 4207, 4206, 4204, 4202, 4202, 4201, 4198, 4196, 4195, 4194, 4193, 4193, 4193, 4193, 4192, 41…
## $ fns [3m[38;5;246m<dbl>[39m[23m 552, 548, 541, 536, 534, 531, 529, 523, 519, 519, 510, 509, 506, 503, 500, 497, 494, 490, 488, 484, 477, 470, 465, 4…
## $ fps [3m[38;5;246m<dbl>[39m[23m 0, 1, 1, 1, 1, 2, 4, 6, 6, 7, 10, 12, 13, 14, 15, 15, 15, 15, 16, 17, 17, 18, 19, 20, 21, 22, 22, 22, 23, 24, 25, 25…
## $ tps [3m[38;5;246m<dbl>[39m[23m 3, 7, 14, 19, 21, 24, 26, 32, 36, 36, 45, 46, 49, 52, 55, 58, 61, 65, 67, 71, 78, 85, 90, 92, 97, 102, 107, 110, 114…
## $ tnr [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9995247, 0.9990494, 0.9985741, 0.9985741, 0.9983365, 0.9976…
## $ fnr [3m[38;5;246m<dbl>[39m[23m 0.9945946, 0.9873874, 0.9747748, 0.9657658, 0.9621622, 0.9567568, 0.9531532, 0.9423423, 0.9351351, 0.9351351, 0.9189…
## $ fpr [3m[38;5;246m<dbl>[39m[23m 0.0000000000, 0.0002376426, 0.0002376426, 0.0002376426, 0.0002376426, 0.0004752852, 0.0009505703, 0.0014258555, 0.00…
## $ tpr [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.012612613, 0.025225225, 0.034234234, 0.037837838, 0.043243243, 0.046846847, 0.057657658, 0.064864865,…
## $ idx [3m[38;5;246m<int>[39m[23m 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31…
theme_new <- theme(
legend.position = "bottom",
legend.title = element_text(size = 11),
legend.text = element_text(size = 9),
legend.key = element_blank(),
panel.background = element_rect(fill = "transparent"),
panel.border = element_rect(color = "black", fill = NA, size = 0.5),
panel.grid.major = element_line(color = "grey", size = 0.333)
)
performance_tbl %>%
filter(f1 == max(f1))
performance_tbl %>%
ggplot(aes(x = threshold)) +
geom_line(aes(y = precision, color = "Precision"), size = 0.5) +
geom_line(aes(y = recall, color = "Recall"), size = 0.5) +
scale_color_manual(breaks = c("Precision", "Recall"),
values = c("blue", "red")) +
# Insert line where precision and recall are harmonically optimized
geom_vline(xintercept = h2o.find_threshold_by_max_metric(performance_h2o, "f1")) +
labs(
title = "Precision vs. Recall",
y = "Value",
x = "Threshold") +
theme_new
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc)
}
model_metrics_tbl <- fs::dir_info(path = "00_h2o_models/03/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,3] %>% as_factor(),
AUC = auc %>% round(4) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(fpr, tpr, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
# just for demonstration purposes
geom_abline(color = "black", linetype = "dotted", size = 0.75) +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "ROC (Receiver Operating Characteristic) Plot",
subtitle = "Performance of Top 4 & Bottom 4 Performing Models",
y = "TPR",
x = "FPR")
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc, precision, recall)
}
model_metrics_tbl <- fs::dir_info(path = "00_h2o_models/03/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,3] %>% as_factor(),
AUC = auc %>% round(4) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(recall, precision, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "Precision vs Recall Plot",
subtitle = "Performance of Top 4 & Bottom 4 Performing Models",
y = "Precision",
x = "Recall")
# Table for Gain and Lift plotting
gain_lift_tbl <- performance_h2o %>%
h2o.gainsLift() %>%
as.tibble()
## Gain Plot
gain_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("lift")) %>%
mutate(baseline = cumulative_data_fraction) %>%
rename(gain = cumulative_capture_rate) %>%
# prepare the data for the plotting (for the color and group aesthetics)
pivot_longer(cols = c(gain, baseline), values_to = "value", names_to = "key")
gain_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Gain Chart",
x = "Cumulative Data Fraction",
y = "Gain")
## Lift Plot
lift_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("capture")) %>%
mutate(baseline = 1) %>%
rename(lift = cumulative_lift) %>%
pivot_longer(cols = c(lift, baseline), values_to = "value", names_to = "key")
lift_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Lift Chart",
x = "Cumulative Data Fraction",
y = "Lift")
plot_h2o_performance <- function(h2o_leaderboard, newdata, order_by = c("auc", "logloss"),
top_models = 2, bottom_models = 2, size = 1.5) {
# Inputs
leaderboard_tbl <- h2o_leaderboard %>%
as_tibble() %>%
slice(1:top_models,(n()-bottom_models+1):n())
newdata_tbl <- newdata %>%
as_tibble()
# Selecting the first, if nothing is provided
order_by <- tolower(order_by[[1]])
# Convert string stored in a variable to column name (symbol)
order_by_expr <- rlang::sym(order_by)
# Turn of the progress bars ( opposite h2o.show_progress())
h2o.no_progress()
# 1. Model Metrics
get_model_performance_metrics <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as.tibble() %>%
select(threshold, tpr, fpr, precision, recall)
}
model_metrics_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_model_performance_metrics, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
# programmatically reorder factors depending on order_by
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(3) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)))
## 1A. ROC Plot
p1 <- model_metrics_tbl %>%
ggplot(aes(fpr, tpr, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "ROC", x = "FPR", y = "TPR") +
theme(legend.direction = "vertical")
## 1B. Precision vs Recall
p2 <- model_metrics_tbl %>%
ggplot(aes(recall, precision, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Precision Vs Recall", x = "Recall", y = "Precision") +
theme(legend.position = "none")
## 2. Gain / Lift
get_gain_lift <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.gainsLift() %>%
as.tibble() %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift)
}
gain_lift_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_gain_lift, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id))) %>%
rename(gain = cumulative_capture_rate,
lift = cumulative_lift)
## 2A. Gain Plot
p3 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, gain,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size,) +
geom_segment(x = 0, y = 0, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Gain", x = "Cumulative Data Fraction", y = "Gain") +
theme(legend.position = "none")
## 2B. Lift Plot
p4 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, lift,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
geom_segment(x = 0, y = 1, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Lift", x = "Cumulative Data Fraction", y = "Lift") +
theme(legend.position = "none")
### Combine using cowplot
# cowplot::get_legend extracts a legend from a ggplot object
p_legend <- get_legend(p1)
# Remove legend from p1
p1 <- p1 + theme(legend.position = "none")
# cowplot::plt_grid() combines multiple ggplots into a single cowplot object
p <- cowplot::plot_grid(p1, p2, p3, p4, ncol = 2)
# cowplot::ggdraw() sets up a drawing layer
p_title <- ggdraw() +
# cowplot::draw_label() draws text on a ggdraw layer / ggplot object
draw_label(glue("Metrics for Top {top_models} & Bottom {bottom_models} H2O Models"),
size = 18, fontface = "bold", color = "#2C3E50")
p_subtitle <- ggdraw() +
draw_label(glue("Ordered by {toupper(order_by)}"),
size = 10, color = "#2C3E50")
# Combine everything
ret <- plot_grid(p_title, p_subtitle, p, p_legend,
# Adjust the relative spacing, so that the legends always fits
ncol = 1, rel_heights = c(0.05, 0.05, 1, 0.05 * (top_models + bottom_models)))
h2o.show_progress()
return(ret)
}
automl_models_h2o@leaderboard %>%
plot_h2o_performance(newdata = test_tbl, order_by = "logloss",
size = 0.75, bottom_models = 4, top_models = 4)
R!